{
 "cells": [
  {
   "cell_type": "raw",
   "id": "b561e40e",
   "metadata": {},
   "source": [
    "特征降维:只能处理数值类型的数据\n",
    "   PCA：无监督学习算法\n",
    "   LDA：有监督学习算法。\n",
    "   KPCA\n",
    "   Isomap\n",
    "   LLE"
   ]
  },
  {
   "cell_type": "raw",
   "id": "84ed3359",
   "metadata": {},
   "source": [
    "维度(列)太高所带来的问题：\n",
    "    1.会导致建模速度变慢\n",
    "    2.会导致出现稀疏矩阵等。\n",
    "所以就需要通过一些技术来减少数据的列数。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "5f1e66c9",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "69d5e246",
   "metadata": {},
   "source": [
    "1.读取数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "f77dbe7a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ID</th>\n",
       "      <th>school</th>\n",
       "      <th>sex</th>\n",
       "      <th>age</th>\n",
       "      <th>address</th>\n",
       "      <th>famsize</th>\n",
       "      <th>Pstatus</th>\n",
       "      <th>Medu</th>\n",
       "      <th>Fedu</th>\n",
       "      <th>Mjob</th>\n",
       "      <th>...</th>\n",
       "      <th>famrel</th>\n",
       "      <th>freetime</th>\n",
       "      <th>goout</th>\n",
       "      <th>Dalc</th>\n",
       "      <th>Walc</th>\n",
       "      <th>health</th>\n",
       "      <th>absences</th>\n",
       "      <th>G1</th>\n",
       "      <th>G2</th>\n",
       "      <th>G3</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>GP</td>\n",
       "      <td>Man</td>\n",
       "      <td>18</td>\n",
       "      <td>U</td>\n",
       "      <td>GT3</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>at_home</td>\n",
       "      <td>...</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>10</td>\n",
       "      <td>5</td>\n",
       "      <td>13</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>GP</td>\n",
       "      <td>WoMan</td>\n",
       "      <td>19</td>\n",
       "      <td>U</td>\n",
       "      <td>GT3</td>\n",
       "      <td>T</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>at_home</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>3</td>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "      <td>5</td>\n",
       "      <td>15</td>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>GP</td>\n",
       "      <td>Man</td>\n",
       "      <td>21</td>\n",
       "      <td>U</td>\n",
       "      <td>GT3</td>\n",
       "      <td>A</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>services</td>\n",
       "      <td>...</td>\n",
       "      <td>3</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>5</td>\n",
       "      <td>7</td>\n",
       "      <td>15</td>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>GP</td>\n",
       "      <td>WoMan</td>\n",
       "      <td>17</td>\n",
       "      <td>U</td>\n",
       "      <td>GT3</td>\n",
       "      <td>T</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>other</td>\n",
       "      <td>...</td>\n",
       "      <td>3</td>\n",
       "      <td>6</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>10</td>\n",
       "      <td>12</td>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>GP</td>\n",
       "      <td>Man</td>\n",
       "      <td>16</td>\n",
       "      <td>U</td>\n",
       "      <td>GT3</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>health</td>\n",
       "      <td>...</td>\n",
       "      <td>4</td>\n",
       "      <td>6</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>9</td>\n",
       "      <td>13</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 24 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   ID school    sex  age address famsize Pstatus  Medu  Fedu      Mjob  ...  \\\n",
       "0   0     GP    Man   18       U     GT3       A     1     4   at_home  ...   \n",
       "1   1     GP  WoMan   19       U     GT3       T     3     3   at_home  ...   \n",
       "2   2     GP    Man   21       U     GT3       A     4     1  services  ...   \n",
       "3   3     GP  WoMan   17       U     GT3       T     2     2     other  ...   \n",
       "4   4     GP    Man   16       U     GT3       A     1     4    health  ...   \n",
       "\n",
       "  famrel freetime goout Dalc  Walc  health  absences  G1  G2  G3  \n",
       "0      2        4     1    6     2       2        10   5  13  10  \n",
       "1      1        5     3    6     1       1         6   5  15  13  \n",
       "2      3        6     3    3     3       3         5   7  15  15  \n",
       "3      3        6     4    2     3       3         4  10  12  15  \n",
       "4      4        6     5    1     4       4         1   9  13  12  \n",
       "\n",
       "[5 rows x 24 columns]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_csv('./4student-info-数据/student-info.csv')\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "raw",
   "id": "ee4d0063",
   "metadata": {},
   "source": [
    "注意点：\n",
    "    1.降维只能针对数值类型的数据进行降维，降维时如果数据中存在object类型的数据就需要对数据进行特征编码。\n",
    "    2.降维时要保证数据没有缺失值；"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "94412e09",
   "metadata": {},
   "source": [
    "2.降维-PCA"
   ]
  },
  {
   "cell_type": "raw",
   "id": "7751f44d",
   "metadata": {},
   "source": [
    "PCA中的两个考点：\n",
    "    1.对数据进行降维，降维到多少列\n",
    "    2.对数据进行降维，保留数据多少的信息解释比。\n",
    "    3.目标列不参与降维"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "d7f68baf",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 特征编码\n",
    "df_oh = pd.get_dummies(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "8b6759df",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 197.0048868 ,   -5.22003118,   -1.9219664 , ...,    0.62274293,\n",
       "           1.19395712,   -1.47197022],\n",
       "       [ 196.00304638,   -7.82168937,   -0.36235233, ...,    1.10123392,\n",
       "           1.65312043,   -0.83908625],\n",
       "       [ 195.00048638,   -7.511552  ,    1.89858598, ...,   -1.31600591,\n",
       "           2.69709334,    2.33967463],\n",
       "       ...,\n",
       "       [-195.00629628,    0.40965813,    0.21552603, ...,   -1.42212733,\n",
       "          -0.64033282,   -1.47604818],\n",
       "       [-196.00489429,    1.57364726,    1.04489526, ...,    1.11376621,\n",
       "          -1.65186012,   -1.80661178],\n",
       "       [-197.00027442,    4.5050727 ,   -3.3298444 , ...,   -1.95460261,\n",
       "           0.69194726,    0.6279009 ]])"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.decomposition import PCA\n",
    "\n",
    "model_pca = PCA(n_components=9)   # n_components 表示保留几列数据\n",
    "df_pca = model_pca.fit_transform(df_oh)\n",
    "df_pca\n",
    "# 转型  降维之后不能进行转型，因为是生成的一份新的数据，根本不知道对饮哪一列了"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cc4381b2",
   "metadata": {},
   "source": [
    "3.降维-KPCA"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "67a54175",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.decomposition import KernelPCA"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "b7de1e12",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 197.0048868 ,   -5.22003118,   -1.9219664 , ...,    0.62274293,\n",
       "           1.19395712,   -1.47197022],\n",
       "       [ 196.00304638,   -7.82168937,   -0.36235233, ...,    1.10123392,\n",
       "           1.65312043,   -0.83908625],\n",
       "       [ 195.00048638,   -7.511552  ,    1.89858598, ...,   -1.31600591,\n",
       "           2.69709334,    2.33967463],\n",
       "       ...,\n",
       "       [-195.00629628,    0.40965813,    0.21552603, ...,   -1.42212733,\n",
       "          -0.64033282,   -1.47604818],\n",
       "       [-196.00489429,    1.57364726,    1.04489526, ...,    1.11376621,\n",
       "          -1.65186012,   -1.80661178],\n",
       "       [-197.00027442,    4.5050727 ,   -3.3298444 , ...,   -1.95460261,\n",
       "           0.69194726,    0.6279009 ]])"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_pca = KernelPCA(n_components=9)\n",
    "df_kpca = model_pca.fit_transform(df_oh)\n",
    "df_kpca\n",
    "# 转型  降维之后不能进行转型，因为是生成的一份新的数据，根本不知道对饮哪一列了"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "bb7602f1",
   "metadata": {},
   "source": [
    "4.降维-LDA"
   ]
  },
  {
   "cell_type": "raw",
   "id": "a92a8261",
   "metadata": {},
   "source": [
    "LDA降维时最多只能降维到n-1列，其中n指的是目标列的类别的数量。因为需要有目标列参与所以需要在降维的时候对数据进行拆分，拆分成其他列和目标列。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "6e6ee768",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>type</th>\n",
       "      <th>alcohol</th>\n",
       "      <th>malic</th>\n",
       "      <th>ash</th>\n",
       "      <th>alcalinity</th>\n",
       "      <th>magnesium</th>\n",
       "      <th>phenols</th>\n",
       "      <th>flavanoids</th>\n",
       "      <th>nonflavanoids</th>\n",
       "      <th>proanthocyanins</th>\n",
       "      <th>color</th>\n",
       "      <th>hue</th>\n",
       "      <th>dilution</th>\n",
       "      <th>proline</th>\n",
       "      <th>particular year</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A</td>\n",
       "      <td>13.20</td>\n",
       "      <td>1.78</td>\n",
       "      <td>2.43</td>\n",
       "      <td>15.6</td>\n",
       "      <td>127.0</td>\n",
       "      <td>2.8</td>\n",
       "      <td>3.06</td>\n",
       "      <td>0.28</td>\n",
       "      <td>2.29</td>\n",
       "      <td>5.64</td>\n",
       "      <td>1.04</td>\n",
       "      <td>3.92</td>\n",
       "      <td>1065</td>\n",
       "      <td>22.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>A</td>\n",
       "      <td>13.20</td>\n",
       "      <td>1.78</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4.38</td>\n",
       "      <td>1.05</td>\n",
       "      <td>3.4</td>\n",
       "      <td>1050</td>\n",
       "      <td>38.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>A</td>\n",
       "      <td>13.16</td>\n",
       "      <td>2.36</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5.68</td>\n",
       "      <td>1.03</td>\n",
       "      <td>3.17</td>\n",
       "      <td>1185</td>\n",
       "      <td>26.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>A</td>\n",
       "      <td>14.37</td>\n",
       "      <td>1.95</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>7.80</td>\n",
       "      <td>0.86</td>\n",
       "      <td>3.45</td>\n",
       "      <td>1480</td>\n",
       "      <td>35.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>A</td>\n",
       "      <td>13.24</td>\n",
       "      <td>2.59</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4.32</td>\n",
       "      <td>1.04</td>\n",
       "      <td>2.93</td>\n",
       "      <td>735</td>\n",
       "      <td>35.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  type  alcohol malic   ash  alcalinity  magnesium  phenols  flavanoids  \\\n",
       "0    A    13.20  1.78  2.43        15.6      127.0      2.8        3.06   \n",
       "1    A    13.20  1.78   NaN         NaN        NaN      NaN         NaN   \n",
       "2    A    13.16  2.36   NaN         NaN        NaN      NaN         NaN   \n",
       "3    A    14.37  1.95   NaN         NaN        NaN      NaN         NaN   \n",
       "4    A    13.24  2.59   NaN         NaN        NaN      NaN         NaN   \n",
       "\n",
       "   nonflavanoids  proanthocyanins  color   hue dilution  proline  \\\n",
       "0           0.28             2.29   5.64  1.04     3.92     1065   \n",
       "1            NaN              NaN   4.38  1.05      3.4     1050   \n",
       "2            NaN              NaN   5.68  1.03     3.17     1185   \n",
       "3            NaN              NaN   7.80  0.86     3.45     1480   \n",
       "4            NaN              NaN   4.32  1.04     2.93      735   \n",
       "\n",
       "   particular year  \n",
       "0             22.0  \n",
       "1             38.0  \n",
       "2             26.0  \n",
       "3             35.0  \n",
       "4             35.0  "
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_csv('./wine3.csv')\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "426eea2a",
   "metadata": {},
   "outputs": [],
   "source": [
    "X = df.drop(axis=1,columns='type')\n",
    "Y = df['type']\n",
    "df.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "87eaad20",
   "metadata": {},
   "outputs": [
    {
     "ename": "ValueError",
     "evalue": "could not convert string to float: ' '",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mValueError\u001b[0m                                Traceback (most recent call last)",
      "Cell \u001b[1;32mIn[9], line 4\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msklearn\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdiscriminant_analysis\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m LinearDiscriminantAnalysis \u001b[38;5;28;01mas\u001b[39;00m LDA\n\u001b[0;32m      3\u001b[0m model_lda \u001b[38;5;241m=\u001b[39m LDA(n_components\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m2\u001b[39m)\n\u001b[1;32m----> 4\u001b[0m \u001b[43mmodel_lda\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit_transform\u001b[49m\u001b[43m(\u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\u001b[43mY\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python310\\site-packages\\sklearn\\base.py:870\u001b[0m, in \u001b[0;36mTransformerMixin.fit_transform\u001b[1;34m(self, X, y, **fit_params)\u001b[0m\n\u001b[0;32m    867\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfit(X, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mfit_params)\u001b[38;5;241m.\u001b[39mtransform(X)\n\u001b[0;32m    868\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m    869\u001b[0m     \u001b[38;5;66;03m# fit method of arity 2 (supervised transformation)\u001b[39;00m\n\u001b[1;32m--> 870\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfit(X, y, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mfit_params)\u001b[38;5;241m.\u001b[39mtransform(X)\n",
      "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python310\\site-packages\\sklearn\\discriminant_analysis.py:550\u001b[0m, in \u001b[0;36mLinearDiscriminantAnalysis.fit\u001b[1;34m(self, X, y)\u001b[0m\n\u001b[0;32m    528\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mfit\u001b[39m(\u001b[38;5;28mself\u001b[39m, X, y):\n\u001b[0;32m    529\u001b[0m     \u001b[38;5;124;03m\"\"\"Fit the Linear Discriminant Analysis model.\u001b[39;00m\n\u001b[0;32m    530\u001b[0m \n\u001b[0;32m    531\u001b[0m \u001b[38;5;124;03m       .. versionchanged:: 0.19\u001b[39;00m\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m    548\u001b[0m \u001b[38;5;124;03m        Fitted estimator.\u001b[39;00m\n\u001b[0;32m    549\u001b[0m \u001b[38;5;124;03m    \"\"\"\u001b[39;00m\n\u001b[1;32m--> 550\u001b[0m     X, y \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_validate_data\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m    551\u001b[0m \u001b[43m        \u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mensure_min_samples\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m2\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfloat64\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfloat32\u001b[49m\u001b[43m]\u001b[49m\n\u001b[0;32m    552\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    553\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mclasses_ \u001b[38;5;241m=\u001b[39m unique_labels(y)\n\u001b[0;32m    554\u001b[0m     n_samples, _ \u001b[38;5;241m=\u001b[39m X\u001b[38;5;241m.\u001b[39mshape\n",
      "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python310\\site-packages\\sklearn\\base.py:596\u001b[0m, in \u001b[0;36mBaseEstimator._validate_data\u001b[1;34m(self, X, y, reset, validate_separately, **check_params)\u001b[0m\n\u001b[0;32m    594\u001b[0m         y \u001b[38;5;241m=\u001b[39m check_array(y, input_name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124my\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mcheck_y_params)\n\u001b[0;32m    595\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 596\u001b[0m         X, y \u001b[38;5;241m=\u001b[39m check_X_y(X, y, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mcheck_params)\n\u001b[0;32m    597\u001b[0m     out \u001b[38;5;241m=\u001b[39m X, y\n\u001b[0;32m    599\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m no_val_X \u001b[38;5;129;01mand\u001b[39;00m check_params\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mensure_2d\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mTrue\u001b[39;00m):\n",
      "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python310\\site-packages\\sklearn\\utils\\validation.py:1074\u001b[0m, in \u001b[0;36mcheck_X_y\u001b[1;34m(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, estimator)\u001b[0m\n\u001b[0;32m   1069\u001b[0m         estimator_name \u001b[38;5;241m=\u001b[39m _check_estimator_name(estimator)\n\u001b[0;32m   1070\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m   1071\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mestimator_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m requires y to be passed, but the target y is None\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m   1072\u001b[0m     )\n\u001b[1;32m-> 1074\u001b[0m X \u001b[38;5;241m=\u001b[39m \u001b[43mcheck_array\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m   1075\u001b[0m \u001b[43m    \u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   1076\u001b[0m \u001b[43m    \u001b[49m\u001b[43maccept_sparse\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maccept_sparse\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   1077\u001b[0m \u001b[43m    \u001b[49m\u001b[43maccept_large_sparse\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maccept_large_sparse\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   1078\u001b[0m \u001b[43m    \u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   1079\u001b[0m \u001b[43m    \u001b[49m\u001b[43morder\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43morder\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   1080\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   1081\u001b[0m \u001b[43m    \u001b[49m\u001b[43mforce_all_finite\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mforce_all_finite\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   1082\u001b[0m \u001b[43m    \u001b[49m\u001b[43mensure_2d\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mensure_2d\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   1083\u001b[0m \u001b[43m    \u001b[49m\u001b[43mallow_nd\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mallow_nd\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   1084\u001b[0m \u001b[43m    \u001b[49m\u001b[43mensure_min_samples\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mensure_min_samples\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   1085\u001b[0m \u001b[43m    \u001b[49m\u001b[43mensure_min_features\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mensure_min_features\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   1086\u001b[0m \u001b[43m    \u001b[49m\u001b[43mestimator\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mestimator\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   1087\u001b[0m \u001b[43m    \u001b[49m\u001b[43minput_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mX\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[0;32m   1088\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m   1090\u001b[0m y \u001b[38;5;241m=\u001b[39m _check_y(y, multi_output\u001b[38;5;241m=\u001b[39mmulti_output, y_numeric\u001b[38;5;241m=\u001b[39my_numeric, estimator\u001b[38;5;241m=\u001b[39mestimator)\n\u001b[0;32m   1092\u001b[0m check_consistent_length(X, y)\n",
      "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python310\\site-packages\\sklearn\\utils\\validation.py:856\u001b[0m, in \u001b[0;36mcheck_array\u001b[1;34m(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator, input_name)\u001b[0m\n\u001b[0;32m    854\u001b[0m         array \u001b[38;5;241m=\u001b[39m array\u001b[38;5;241m.\u001b[39mastype(dtype, casting\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124munsafe\u001b[39m\u001b[38;5;124m\"\u001b[39m, copy\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[0;32m    855\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 856\u001b[0m         array \u001b[38;5;241m=\u001b[39m \u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43masarray\u001b[49m\u001b[43m(\u001b[49m\u001b[43marray\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43morder\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43morder\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    857\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m ComplexWarning \u001b[38;5;28;01mas\u001b[39;00m complex_warning:\n\u001b[0;32m    858\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m    859\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mComplex data not supported\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mformat(array)\n\u001b[0;32m    860\u001b[0m     ) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcomplex_warning\u001b[39;00m\n",
      "File \u001b[1;32mD:\\soft\\anaconda\\lib\\site-packages\\pandas\\core\\generic.py:2070\u001b[0m, in \u001b[0;36mNDFrame.__array__\u001b[1;34m(self, dtype)\u001b[0m\n\u001b[0;32m   2069\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__array__\u001b[39m(\u001b[38;5;28mself\u001b[39m, dtype: npt\u001b[38;5;241m.\u001b[39mDTypeLike \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m np\u001b[38;5;241m.\u001b[39mndarray:\n\u001b[1;32m-> 2070\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43masarray\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_values\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[43m)\u001b[49m\n",
      "\u001b[1;31mValueError\u001b[0m: could not convert string to float: ' '"
     ]
    }
   ],
   "source": [
    "from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA\n",
    "\n",
    "model_lda = LDA(n_components=2)\n",
    "model_lda.fit_transform(X,Y)\n",
    "# 转型、合并数据  "
   ]
  },
  {
   "cell_type": "markdown",
   "id": "87880730",
   "metadata": {},
   "source": [
    "5.降维-Isomap"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f45c78f7",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.manifold import Isomap"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c089c556",
   "metadata": {},
   "outputs": [],
   "source": [
    "model_is = Isomap(n_components=9)\n",
    "df_is = model_pca.fit_transform(df_oh)\n",
    "df_is"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9d3c9223",
   "metadata": {},
   "source": [
    "6.降维-LLE"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "de68a54e",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.manifold import locally_linear_embedding\n",
    "df_lle = locally_linear_embedding(X=df_oh, n_neighbors=12, n_components=3)\n",
    "df_lle"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "406aa861",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
